#! /usr/bin/env perl

###########################################################################
#
# vfind
#
###########################################################################
#
# This command performs recursive searches on files and directories in the repository, ignoring the same files
# that VC commands would ignore, as well as the VC control directories.  You can use it to find files, find
# directories, or "grep" all files (except that this uses Perl regexen rather than basic or extended ones).
#
# #########################################################################
#
# All the code herein is released under the Artistic License
#		( http://www.perl.com/language/misc/Artistic.html )
# Copyright (c) 2004-2011 Barefoot Software, Copyright (c) 2004-2006 ThinkGeek
#
###########################################################################

use strict;
use warnings;

use Data::Dumper;
use File::Basename;

use VCtools::Base;
use VCtools::Args;
use VCtools::Common;


use constant LONG_LINE => 512;


#################################
# OPTIONS AND ARGUMENTS
#################################

VCtools::switch('current_dir', 'c', 'start from . (default: start from root project dir)');
VCtools::args('dirs', 'optlist', 'directory/ies to search (default: search all from starting dir)');
VCtools::action('print', 'print filename (only)  (this is the default action unless -grep specified)');
VCtools::action('file', 'consider a file only if its basename contains this string', 'string');
VCtools::action('grep', 'search file for (Perl regex) pattern (also prints filename)', 'pattern');
VCtools::action('dirfind', 'find closest matching dirname (exact matches and least depth preferred)', 'dirname');
VCtools::action('emptydir', 'find all empty directories');
VCtools::action('nocomments', 'try to ignore comments with -grep (may not always work)--CURRENTLY EXTREMELY SLOW!');
VCtools::action('nolonglines', 'ignore lines over ' . LONG_LINE . ' chars with -grep');
VCtools::action('codeonly', 'ignore non-code file patterns (as defined in config)');
VCtools::getopts();
print STDERR "finished with opts\n" if DEBUG >= 5;

# remember, directories are files too
my @files = VCtools::dirs();


#################################
# CHECK FOR ERRORS
#################################

my $proj = VCtools::check_common_errors();

if (!!VCtools::grep() + !!VCtools::dirfind() + !!VCtools::emptydir() > 1)
{
	VCtools::fatal_error("can only supply one of: -grep, -dirfind, -emptydir");
}

my $snoop;
if (VCtools::nocomments())
{
	VCtools::fatal_error("-nocomments doesn't make sense without -grep") unless VCtools::grep();

	eval { require File::Comments } or VCtools::fatal_error("cannot load File::Comments (required for -nocomments)");
	$snoop = File::Comments->new() or VCtools::fatal_error("cannot instantiate File::Comments object");
	print STDERR "got snoop object\n" if DEBUG >= 2;
}

if (VCtools::nolonglines())
{
	VCtools::fatal_error("-nolonglines doesn't make sense without -grep") unless VCtools::grep();
}


#################################
# MAIN
#################################

my $projdir = VCtools::project_dir();
# this helps regex'es go faster
my $projdir_qr = qr[^$projdir/];
print STDERR "figuring projdir is $projdir\n" if DEBUG >= 2;

if (VCtools::current_dir())
{
	if (not @files)
	{
		@files = ('.');
	}
}
else
{
	chdir $projdir or VCtools::fatal_error("can't change to TLD of project");
	@files = ('.') unless @files;
}

# assume false
my $retval = 0;

# do this one outside the loop since it really only needs to be done once
my $grep_pattern = VCtools::grep();
if ($grep_pattern)
{
	eval { $grep_pattern = qr/$grep_pattern/ } or VCtools::fatal_error("illegal grep pattern: $@");
}
print STDERR "set grep pattern to $grep_pattern\n" if DEBUG >= 2;

# ditto
my $dirmatch = VCtools::dirfind();
if ($dirmatch)
{
	print ".\n" and exit if $dirmatch eq '.';							# this makes vcd with no args _much_ faster

	print STDERR "about to eval dirmatch ...\n" if DEBUG >= 5;
	# this shouldn't ever happen, I don't think
	eval { $dirmatch = qr{(?:^|/)$dirmatch(.*)$} } or VCtools::fatal_error("can't turn dirname into a pattern: $@");
}
print STDERR "set dirmatch to $dirmatch\n" if DEBUG >= 2;
our %possible_dirs;

# and ditto again
my $filematch = VCtools::file();
if ($filematch)
{
	# even less likely for this to happen IMHO
	eval { $filematch = qr{[^/]*\Q$filematch\E[^/]*$} } or VCtools::fatal_error("can't turn filename into a pattern: $@");
}

# set up non-code patterns to check against if any and if applicable
my @non_code;
if (VCtools::codeonly())
{
	my $noncode = VCtools::get_proj_directive($proj, NonCodeRegex => []);
	@non_code = ref $noncode ? @$noncode : ($noncode);
}
print STDERR "non-code set to list:\n", join("\n", @non_code), "\n[LIST ENDS]\n" if DEBUG >= 3;


# need this for emptydir
our %dir_filecount;

FILE: foreach (VCtools::get_all_files(@files))
{
	exit if VCtools::pretend();											# no point in going further if running under -p
	next if $filematch and not /$filematch/;
	foreach my $re (@non_code)
	{
		print STDERR "checking $_ against $re\nand it ", /$re/ ? 'passes' : 'fails', "\n" if DEBUG >= 6;
		next FILE if /$re/;
	}

	if ($grep_pattern)
	{
		$retval |= grep_file($_, $grep_pattern);
	}
	elsif ($dirmatch)
	{
		$retval |= dir_find($_, $dirmatch);
	}
	elsif (VCtools::emptydir())
	{
		$retval |= count_files($_);
	}
	else
	{
		# doesn't really matter if they specified -print or not;
		# this is the default action
		$retval |= print_file($_, "\n");
	}
}


# dirfind option doesn't print anything as it goes along, so print result here
if ($dirmatch)
{
	if (%possible_dirs)
	{
		print "possible_dirs: ", Dumper(\%possible_dirs) if DEBUG >= 3;

		my @dirs = sort {
			$possible_dirs{$a} <=> $possible_dirs{$b} || $a cmp $b
		} keys %possible_dirs;
		print "dirs: @dirs\n" if DEBUG >= 2;

		print "$dirs[0]\n";
	}
	else
	{
		# oops, no possibles
		VCtools::warning("no directories found");
	}
}

# ditto for emptydir
if (VCtools::emptydir())
{
	# I hate having to have the blatant ls call in there, but Subversion (at least) refuses to recurse into
	# unknown directories, so we have to (the ls will make it so it won't work on Windows, but I don't know
	# that VCtools has much chance of working on Windows anyway)
	print join("\n", sort grep { $dir_filecount{$_} == 0 && !`ls $_` } keys %dir_filecount), "\n";
}

# if everything is true, that means no errors,
# so reverse the sense of our $retval
exit(not $retval);


#################################
# SUBS
#################################


sub print_file
{
	my ($file, $trailer) = @_;
	$trailer ||= '';

	$file =~ s/$projdir_qr//;
	$file .= "/" if -d $file;
	print "$file$trailer";

	# print is always true
	return 1;
}


sub grep_file
{
	my ($file, $pattern) = @_;
	print STDERR "grep_file($file, $pattern)\n" if DEBUG >= 4;
	my $retval = 0;

	# certainly don't try to grep dirs, pipes, sockets, etc
	return 0 unless -f $file;

	if (-T $file)														# grepping binary files messes up the terminal
	{
		if (VCtools::nocomments())
		{
			my $contents = $snoop->stripped($file);
			if ($contents)
			{
				foreach (split($/, $contents))
				{
					last unless grep_line($file, $pattern, \$retval);
				}
			}
			else
			{
				print STDERR "found empty file $file\n" if DEBUG >= 4;
			}
		}
		else
		{
			if (not open(IN, $file))
			{
				VCtools::warning("can't open file $file");
				return 0;
			}

			while ( <IN> )
			{
				last unless grep_line($file, $pattern, \$retval);
			}
			close(IN);
		}
	}
	else
	{
		VCtools::warning("skipped binary file $file");
	}

	return $retval;
}

# this expects its line to be in $_
sub grep_line
{
	my ($file, $pattern, $retval) = @_;

	# don't bother to check super long lines if the user says not to
	# (but still return 1 so that following lines will be checked)
	return 1 if VCtools::nolonglines() and length > LONG_LINE;

	if ( /$pattern/ )
	{
		$$retval = 1;

		# using -grep and -print at the same time indicates that we only want the file name
		if (VCtools::print())
		{
			print_file($file, "\n");
			return 0;
		}
		else
		{
			print_file($file, ":");
			print;
		}
	}

	return 1;
}


sub dir_find
{
	my ($file, $pattern) = @_;

	return 0 unless -d $file;
	return 0 unless /$dirmatch/;

	# (note: pattern we built for $dirmatch captures any trailing chars as $1, so we can tell exact
	# matches by checking $1 for having zero length)
	my $exact = length($1) == 0;
	
	# count the number of subdirs
	# this allows us to rank choices by how "deep" they are
	my $depth = s@/@/@g || 0;

	# we want exact matches to rank higher than partials
	# thus we add an arbitrary large number to non-exact matches, making them drop low in the sorting
	$possible_dirs{$file} = $depth + !$exact * 100;

	return 1;
}


sub count_files
{
	my ($file) = @_;

	$dir_filecount{$file} += 0 if -d $file;								# make sure every dir has an entry
	return 1 if $file eq '.';											# . isn't a subdir of anything
	++$dir_filecount{dirname($file)};									# but everything else is (even dirs)

	# can't see any way for this to go wrong ...
	return 1;
}
